from typing import List

from swift.llm import InferClient, InferRequest


def infer_batch(engine: InferClient, infer_requests: List[InferRequest]):
    resp_list = engine.infer(infer_requests)
    query0 = infer_requests[0].messages[0]["content"]
    query1 = infer_requests[1].messages[0]["content"]
    print(f"query0: {query0}")
    print(f"response0: {resp_list[0].choices[0].message.content}")
    print(f"query1: {query1}")
    print(f"response1: {resp_list[1].choices[0].message.content}")


if __name__ == "__main__":
    engine = InferClient(host="127.0.0.1", port=8000)
    models = engine.models
    print(f"models: {models}")
    infer_batch(
        engine,
        [
            InferRequest(messages=[{"role": "user", "content": "今天天气真好呀"}]),
            InferRequest(messages=[{"role": "user", "content": "真倒霉"}]),
        ],
    )
